# Cleans the results from fitting unit elec gen model
library(pacman)
p_load(
  here, fst, data.table, purrr, janitor, ggplot2, lubridate, stringr
)

# Regional load data
nerc_load_dt = 
  read.fst(
    path = here("Data/electricity-generation/clean-load/nerc-load-dt.fst"),
    as.data.table = TRUE
  )

# Reading the coef files 
unit_gen_coef_dt = 
  read.fst(
    path = here("Data/electricity-generation/unit-model-fit/unit-gen-coef-dt.fst"),
    as.data.table = TRUE
  )
# Unit info table
unit_info_dt = 
  read.fst(
    here("Data/electricity-generation/unit-info-dt.fst"),
    as.data.table = TRUE
  )

# How many coefs are negative? 
coef_dt = 
  map_dfr(
    list.files(
      here("Data/electricity-generation/unit-model-fit/coefs"), 
      full.names = TRUE
    ),
    \(x){read.fst(x, as.data.table= TRUE)}
  ) %>% 
  .[,.(
    orispl_code, unitid,
    coef_nerc = str_extract(rn, "(?<=_)cal|mro|npcc|rfc|serc|tre|wecc|Intercept|Log\\(scale\\)"),
    coef_sq = str_detect(rn, "sq"),
    estimate,
    rn
  )] |>
  dcast(
    orispl_code + unitid + coef_nerc ~ coef_sq,
    value.var = "estimate",
    fill = NA
  ) |>
  setnames(
    old = c('FALSE','TRUE'),
    new = c('coef','coef_sq')
  ) 
  
  
coef_dt[
  is.na(coef_sq) &
  !(coef_nerc %in% c("Intercept","Log(scale)")),
  .(tot_pct = sum(coef < 0)/.N)
  #,by = coef_nerc
]

# Inflection points of polynomials 
coef_dt[!is.na(coef_sq), .(
  pct_concave = sum(coef_sq < 0)/.N
)]

coef_dt[, max := ifelse(coef_sq < 0 , -coef/(2*coef_sq), NA)]


# Finding the percentile of the maximum 
inflection_point_p = 
  ggplot( data = 
    merge(
      nerc_load_dt[!is.na(nerc_adj),.(
        probs = seq(0.005,0.995, by = 0.005),
        quant = quantile(excess_load, probs = seq(0.005,0.995, by = 0.005))
      ), by = nerc_adj],
      coef_dt[!is.na(max), .(orispl_code, unitid, nerc_adj = str_to_upper(coef_nerc), max)],
      by = 'nerc_adj',
      allow.cartesian = TRUE
    )[max > quant, .(percentile = max(probs)),by = .(orispl_code, unitid, nerc_adj)],
    aes(x = percentile)
  ) + 
  geom_histogram(binwidth = 0.005) +
  labs(
    x = "Percentile of NERC Excess Load",
    y = "Count"
  ) + 
  theme_minimal()
ggsave(
  plot = inflection_point_p,
  filename = here('figures/electricity-generation/inflection-point-distr.pdf'),
  device = cairo_pdf,
  width = 7, height = 5
)


# Reading the electricity generation files (with fitted values)
elec_gen_dt=
  read.fst(
    path = here("Data/electricity-generation/unit-model-fit/elec-gen-fit-dt.fst"),
    as.data.table = TRUE
  ) |>
  merge(
    unit_info_dt,
    by = c('orispl_code','unitid')
  )

# Checking if any exceed the namepcap or less than zero
elec_gen_dt[
  fit_gload > namepcap | 
              fit_gload < 0,
  .N, keyby = .(orispl_code, unitid
)]

#elec_gen_dt=
#  read.fst(
#    path = here("Data/electricity-generation/elec-gen-dt.fst"),
#    as.data.table = TRUE
#  )
#
#unique(elec_gen_dt[,.(orispl_code, unitid)])[,.N]
#elec_gen_dt[,sum(gload_mw,na.rm = TRUE)]

# Summarizing production by region
nerc_gen_dt = 
  elec_gen_dt[,.(
    tot_gload = sum(gload_mw),
    tot_fit_gload = sum(fit_gload)), 
    keyby = .(nerc_adj, utc_time)
  ]

# Fit for 
ggplot(nerc_gen_dt, aes(x = tot_gload, y = tot_fit_gload)) + 
  geom_smooth() + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  facet_wrap(~nerc_adj, scales = "free") + 
  labs(
    x = "Actual Production (MW)",
    y = "Predicted Production (MW)"
  ) + 
  theme_minimal()

#ex_production_month_hr_p = 
#  rbind(
#    elec_gen_dt[orispl_code == '10' & unitid == "CT10",.(utc_time, gload_mw_tot, type = "Actual")],
#    elec_gen_dt[orispl_code == '10' & unitid == "CT10",.(utc_time, gload_mw_tot = fit_gload, type = #"Predicted")]
#  ) |>
#  ggplot(aes(x = hour(utc_time), y = gload_mw_tot, color = type)) + 
#  geom_jitter(alpha = 0.05)+
#  geom_smooth(se = FALSE) + 
#  scale_color_viridis_d(
#    name = "", begin= 0.25, end = 0.75,option = "magma"
#  ) +
#  facet_wrap(~month(utc_time, label = TRUE)) + 
#  labs(
#    x = "Hour (UTC)",
#    y = "Electricity Production (MW)"
#  ) +
#  ylim(200,850) +
#  theme_minimal()
#ggsave(
#  plot = ex_production_month_hr_p,
#  filename = here("figures/electricity-generation/example-fitted-hr-generation.pdf"),
#  device = cairo_pdf,
#  width = 8, height = 6
#)

production_month_hr_p = 
  rbind(
    elec_gen_dt[nerc_adj == 'WECC',.(utc_time, gload_mw, type = "Actual")],
    elec_gen_dt[nerc_adj == 'WECC',.(utc_time, gload_mw = fit_gload, type = "Predicted")]
  ) |>
  ggplot(aes(x = hour(utc_time), y = gload_mw, color = type)) + 
  #geom_jitter(alpha = 0.05)+
  geom_smooth(se = FALSE) + 
  scale_color_viridis_d(
    name = "", begin= 0.25, end = 0.75,option = "magma"
  ) +
  facet_wrap(~month(utc_time, label = TRUE)) + 
  #facet_wrap(~region) + 
  labs(
    x = "Hour (UTC)",
    y = "Electricity Production (MW)"
  ) +
  theme_minimal()
ggsave(
  plot = production_month_hr_p,
  filename = here("figures/electricity-generation/fitted-hr-generation.pdf"),
  device = cairo_pdf,
  width = 8, height = 6
)


# How does excess load compare to aggregate load in each BA  
load_vs_gload = 
  rbind(
    nerc_load_dt[,.(utc_time, nerc_adj, type = "Solar", load = load_solar)],
    nerc_load_dt[,.(utc_time, nerc_adj, type = "Wind", load = load_nd-load_solar)],
    nerc_load_dt[,.(utc_time, nerc_adj, type = "Hydro", load = load_hydro)],
    nerc_load_dt[,.(utc_time, nerc_adj, type = "Load", load)],
    nerc_load_dt[,.(utc_time, nerc_adj, type = "Fossil Fuels", load = load_ff)],
    nerc_gen_dt[,.(utc_time, nerc_adj, type = "CEMS Actual", load = tot_gload)],
    nerc_gen_dt[,.(utc_time, nerc_adj, type = "CEMS Predicted", load = tot_fit_gload)]
  )

load_region_p =
  load_vs_gload[!is.na(nerc_adj) & !str_detect(type,"CEMS") ] |>
  ggplot(aes(x = hour(utc_time), y = load, color = type)) +
  geom_smooth(se = FALSE) +
  facet_wrap(~nerc_adj, scales = "free") +
  theme_minimal() + 
  scale_color_brewer("", palette = "Dark2") + 
  labs(x = "Hour (UTC)", y = "Load (MW)") + 
  theme(legend.position = 'bottom')
ggsave(
  plot = load_region_p,
  filename = here("figures/electricity-generation/fitted-load-region.pdf"),
  device = cairo_pdf,
  width = 8, height = 6
)

# Giving an example week 
load_week_example_p =
  load_vs_gload[
    !is.na(nerc_adj) 
    & week(utc_time) == 40
    & year(utc_time) == 2019
    & str_detect(type,"CEMS"),
    .(load = sum(load)),
    by = .(nerc_adj, utc_time, type)
  ] |>
  ggplot(aes(x = utc_time, y = load, color = type)) +
  geom_line() +
  facet_wrap(~nerc_adj, scales = "free") +
  theme_minimal() + 
  scale_color_brewer("", palette = "Dark2") + 
  labs(x = "Time (UTC)", y = "Load (MW)") + 
  theme(legend.position = 'bottom')
ggsave(
  plot = load_week_example_p,
  filename = here("figures/electricity-generation/load-total-ex-week.pdf"),
  device = cairo_pdf,
  width = 8, height = 6
)



